
import facebook
import requests
import json
import time
import urllib

# Facebook API endpoints:
# https://developers.facebook.com/docs/graph-api/reference/v3.2/group/albums
# https://developers.facebook.com/docs/graph-api/reference/v3.2/album/photos


########################################################################################################################

def get_albums(api, group_id):
	albums = api.get_connections(group_id, 'albums?fields=id,name,created_time')
	all_album_objects = []
	req_count = 0

	while True:
		try:
			albums_req = [i for i in albums["data"]]
			all_album_objects.extend(albums_req)
			req_count += 1

			print "%d \t | Got %d albums \t | TOTAL: %d" % (req_count, len(albums_req), len(all_album_objects))

			# Attempt to make a request to the next page of data, if it exists.
			albums = requests.get(albums['paging']['next']).json()
		except KeyError:
			# No more pages
			break

	# write albums to file
	fout = open("../data/%s_albums.json" % group_id, "w")
	json.dump(all_album_objects, fout, indent=4, sort_keys=True)
	print "Albums dumped to: data/%s_albums.json" % group_id

########################################################################################################################

def get_album_photos(api, album_id):
	photos = api.get_connections(album_id, 'photos?fields=id,images,updated_time,name_tags')
	all_photo_objects = []
	print "Photos --- %s |" % album_id,

	while True:
		try:
			photos_req = [i for i in photos["data"]]
			all_photo_objects.extend(photos_req)
			print " -%d- " % len(photos_req),

			# Attempt to make a request to the next page of data, if it exists.
			photos = requests.get(photos['paging']['next']).json()
		except KeyError:
			# No more pages
			break

	print ""

	#fout = open("data/test.json", "w")
	#json.dump(all_photo_objects, fout, indent=4, sort_keys=True)

	return all_photo_objects

########################################################################################################################

def get_all_photos(api, group_id):
	# read album json
	albums = json.load(open("../data/%s_albums.json" % group_id))
	fout = open("../data/%s_all_photo_urls.txt" % group_id, "w", 1)

	for i, album in enumerate(albums):
		print "%d / %d | Album: %s" % (i, len(albums), album)
		photos = get_album_photos(api, album["id"])

		for photo in photos:
			photo_url = get_url_of_max_img(photo["images"])
			fout.write("%s||%s||%s||%s||%s\n" % (album["id"], album["created_time"], album["name"], photo["id"], photo_url))

		print ""

		time.sleep(1)

	fout.close()

########################################################################################################################

def download_images(group_id):
	fin = open("../data/%s_all_photo_urls.txt" % group_id)
	ferr = open("../data/%s_errors.txt" % group_id, "w")

	for i, line in enumerate(fin):
		try:
			print i,
			parts = line.strip().split("||")
			p_id = parts[3]
			p_url = parts[4]

			urllib.urlretrieve(p_url, "photos/%s.jpg" % p_id)
			print " ... downloaded"

			#time.sleep(1)
		except:
			print " ... FAILED"
			ferr.write("%d\n" % i)

	print "Done!"

########################################################################################################################

def get_url_of_max_img(imgs):
	max_img = max(imgs, key=lambda i: i["width"])
	return max_img["source"]

########################################################################################################################

#
# MAIN
#
def main():
	#
	# FB Object
	#
	# Get token here: https://developers.facebook.com/tools/explorer/
	access_token = '***'
	api = facebook.GraphAPI(access_token)

	#
	# Crawling group data
	#
	# group_id = "467048970077836" # NYC
	# group_id = "772881386093091" # BOS
	# group_id = "589993821132754" # TX
	group_id = "310631999138438" # AZ

	get_albums(api, group_id)
	# get_all_photos(api, group_id)
	# download_images(group_id)


if __name__ == "__main__":
    main()

# END
